In [1]:
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
X
: data, 2d numpy array or scipy sparse matrix of shape (n_samples, n_features)
y
: targets, 1d numpy array of shape (n_samples,)
``model.fit(X_train, [y_train])`` | |
---|---|
``model.predict(X_test)`` | ``model.transform(X_test)`` |
Classification | Preprocessing |
Regression | Dimensionality Reduction |
Clustering | Feature Extraction |
Feature selection |
In [2]:
from sklearn.datasets import load_digits
from sklearn.linear_model import LogisticRegression
from sklearn.cross_validation import cross_val_score
digits = load_digits()
X, y = digits.data / 16., digits.target
cross_val_score(LogisticRegression(), X, y, cv=5)
Out[2]:
In [3]:
from sklearn.grid_search import GridSearchCV
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y)
grid = GridSearchCV(LogisticRegression(), param_grid={'C': np.logspace(-3, 2, 6)})
grid.fit(X_train, y_train)
grid.score(X_test, y_test)
Out[3]:
In [4]:
from sklearn.pipeline import make_pipeline
from sklearn.feature_selection import SelectKBest
pipe = make_pipeline(SelectKBest(k=59), LogisticRegression())
pipe.fit(X_train, y_train)
pipe.score(X_test, y_test)
Out[4]:
In [5]:
cross_val_score(LogisticRegression(C=.01), X, y == 3, cv=5)
Out[5]:
In [6]:
cross_val_score(LogisticRegression(C=.01), X, y == 3, cv=5, scoring="roc_auc")
Out[6]:
In [7]:
from sklearn.preprocessing import OneHotEncoder
X = np.array([[15.9, 1], # from Tokyo
[21.5, 2], # from New York
[31.3, 0], # from Paris
[25.1, 2], # from New York
[63.6, 1], # from Tokyo
[14.4, 1], # from Tokyo
])
y = np.array([0, 1, 1, 1, 0, 0])
encoder = OneHotEncoder(categorical_features=[1], sparse=False)
pipe = make_pipeline(encoder, LogisticRegression())
pipe.fit(X, y)
pipe.score(X, y)
Out[7]: